vp_raw <- read.csv("mega-veridicality-v2.csv")
emotion_raw <- read.csv("BRM-emot-submit.csv")
emotion_select <- select(emotion_raw, "Word", "V.Mean.Sum", "V.SD.Sum", "A.Mean.Sum", "A.SD.Sum")
vp_data <- select(vp_raw, "participant", "verb", "frame", "voice", "polarity", "conditional", "sentence", "veridicality", "acceptability", "exclude")
Norming valence mean to get positive and negative valence
emotion <- emotion_select %>%
mutate(valence_scaled = scale(V.Mean.Sum))
word_list <- unique(emotion$Word)
# Need to filter by acceptability (see 2019 paper)
# Need to normalize by participants (2016 paper used "ordinal model-based normalization procedure")
vp_data <- vp_data %>%
mutate(veridicality_num = ifelse(veridicality == "yes", 1, ifelse(veridicality == "no", -1, 0))) %>%
filter(exclude == "False") %>%
mutate(Word = verb) %>%
filter(Word %in% word_list)
create relevant subset for veridicality ratings
example of items
| that_S |
active |
Someone cared that a particular thing happened. Did that thing happen? |
| that_S |
passive |
Someone was revolted that a particular thing happened. Did that thing happen? |
| to_VPeventive |
active |
A particular person loved to do a particular thing. Did that person do that thing? |
| to_VPeventive |
passive |
A particular person was seen to do a particular thing. Did that person do that thing? |
| to_VPstative |
active |
A particular person opted to have a particular thing. Did that person have that thing? |
| to_VPstative |
passive |
A particular person was inspired to have a particular thing. Did that person have that thing? |
| for_NP_to_VP |
active |
Someone pressed for a particular thing to happen. Did that thing happen? |
| for_NP_to_VP |
passive |
N/A |
| NP_to_VPeventive |
active |
Someone contracted a particular person to do a particular thing. Did that person have that thing? |
| NP_to_VPeventive |
passive |
N/A |
| NP_to_VPstative |
active |
Someone badgered a particular person to have a particular thing. Did that person have that thing? |
| NP_to_VPstative |
passive |
N/A |
# the White paper (2016) uses only observations with case "that_S" to calculate veridicality, not sure if this is important
veridicality_filter <- vp_data %>%
filter(polarity == "positive" & conditional == "False")
# veridicality_ratings <- veridicality_filter %>%
# multi_boot_standard(col = "veridicality_num") %>%
# mutate(veridicality_mean = mean, YMin = mean - ci_lower, YMax = mean + ci_upper) %>%
# ungroup(Word, voice) %>%
# mutate(Word = fct_reorder(as.factor(Word), mean))
veridicality_ratings <- veridicality_filter %>%
group_by(Word, voice) %>%
# multi_boot_standard(col = "veridicality_num") %>%
summarise(veridicality_mean = mean(veridicality_num)) %>%
ungroup() %>%
mutate(Word = fct_reorder(Word, veridicality_mean))
veridicality <- merge(veridicality_ratings, emotion, by = "Word", all.x = TRUE)
veridicality <- veridicality %>%
rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))
ggplot(data = veridicality, aes(x = valence_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = veridicality, aes(x = arousal_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(~valence_group) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = veridicality, aes(x = arousal_mean, y = veridicality_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(voice~valence_group) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

{r} # veridicality$relativeValence = abs(veridicality$valence_mean) # m = lm(mean ~ relativeValence + arousal_mean, data = veridicality) # summary(m) #
create relevant subset for projectivity ratings
examples of items:
| that_S |
active |
positive |
If John didn’t find that a particular thing happened, did that thing happen? |
arousal low, projectivity low |
| that_S |
passive |
negative |
If John wasn’t scared that a particular thing happened, did that thing happen? |
arousal high, projectivity high |
| that_S |
active |
positive |
If John didn’t discover that a particular thing happened, did that thing happen? |
outlier: arousal high, projectivity low |
| that_S |
passive |
positive |
If John wasn’t jarred that a particular thing happened, did that thing happen? |
outlier: arousal low, projectivity high |
| that_S |
active |
negative |
If John didn’t resent that a particular thing happened, did that thing happen? |
outlier: arousal low, projectivity high |
| that_S |
passive |
negative |
If John wasn’t tricked that a particular thing happened, did that thing happen? |
outlier: arousal high, projectivity low |
projectivity_filter <- vp_data %>%
filter(polarity == "negative" & conditional == "True")
# projectivity_ratings <- projectivity_filter %>%
# multi_boot_standard(col = "veridicality_num") %>%
# mutate(projectivity_mean = mean, YMin = mean - ci_lower, YMax = mean + ci_upper) %>%
# ungroup(Word, voice) %>%
# mutate(Word = fct_reorder(as.factor(Word), mean))
projectivity_ratings <- projectivity_filter %>%
group_by(Word, voice) %>%
# multi_boot_standard(col = "veridicality_num") %>%
summarise(projectivity_mean = mean(veridicality_num)) %>%
ungroup() %>%
mutate(Word = fct_reorder(Word, projectivity_mean))
projectivity <- merge(projectivity_ratings, emotion, by = "Word", all.x = TRUE)
projectivity <- projectivity %>%
rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))
ggplot(data = projectivity, aes(x = valence_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = projectivity, aes(x = arousal_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(~valence_group) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

# taking into account voice
ggplot(data = projectivity, aes(x = arousal_mean, y = projectivity_mean, colour = valence_group, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(voice~valence_group) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

projectivity against arousal_mean, by valence_bins
projectivity_neg <- projectivity %>%
filter(valence_group == "negative")
projectivity_neg <-projectivity_neg[order(projectivity_neg$valence_mean),]
projectivity_neg[c("valence_bin")] <- 0
sep_neg = nrow(projectivity_neg) / 3 # 63.66667
for (i in 1:nrow(projectivity_neg)) {
if (i <= sep_neg) {
projectivity_neg[i, "valence_bin"] <- "very negative" #contains 63 items
} else if (i <= 2 * sep_neg) {
projectivity_neg[i, "valence_bin"] <- "moderately negative" # contains 64 items
} else {
projectivity_neg[i, "valence_bin"] <- "slightly negative" # contains 64 items
}
}
projectivity_pos <- projectivity %>%
filter(valence_group == "positive")
projectivity_pos <-projectivity_pos[order(projectivity_pos$valence_mean),]
projectivity_pos[c("valence_bin")] <- 0
sep_pos = nrow(projectivity_pos) / 3 # 77.3333
for (i in 1:nrow(projectivity_pos)) {
if (i <= sep_pos) {
projectivity_pos[i, "valence_bin"] <- "slightly positive" # contains 77 items
} else if (i <= 2 * sep_pos) {
projectivity_pos[i, "valence_bin"] <- "moderately positive" # contains 77 items
} else {
projectivity_pos[i, "valence_bin"] <- "very positive" # contains 78 items
}
}
projectivity_bin <- rbind(projectivity_neg, projectivity_pos)
projectivity_bin$valence_bin_f = factor(projectivity_bin$valence_bin, levels=c("very negative", "moderately negative", "slightly negative", "slightly positive", "moderately positive", "very positive"))
ggplot(data = projectivity_bin, aes(x = arousal_mean, y = projectivity_mean, colour = valence_bin_f, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(~valence_bin_f) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

# taking into account voice
ggplot(data = projectivity_bin, aes(x = arousal_mean, y = projectivity_mean, colour = valence_bin_f, label = Word)) +
#geom_point(width = .3,height = .025) +
facet_grid(voice~valence_bin_f) +
geom_label() +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'

{r} # projectivity$relativeValence = abs(projectivity$valence_mean) # m = lm(mean ~ relativeValence + arousal_mean, data = projectivity) # summary(m) #
projectivity_ratings_participant <- projectivity_filter
projectivity_participant <- merge(projectivity_ratings_participant, emotion, by = "Word", all.x = TRUE)
projectivity_participant <- projectivity_participant %>%
rename(valence_mean = valence_scaled, arousal_mean = A.Mean.Sum, valence_SD = V.SD.Sum, arousal_SD = A.SD.Sum) %>%
mutate(valence_group = ifelse(valence_mean < 0, "negative", "positive"))
# the model check of the model as previously formulated (ie, with uncentered predictors) suggested huge collinearity, almost all values in covariance matrix > .4). the current model check still suggests there's sth wonky going on with the homoscedasticity tests (because the outcome variable is a three-way categorical variable instead of continuous, which we can't do anything about within the model, but seee logistic model below that replicates the result).
projectivity_participant$relativeValence = abs(projectivity_participant$valence_mean)
# visualize correlations between variables
pairscor.fnc(projectivity_participant[,c("veridicality_num","arousal_mean","relativeValence","valence_mean")])

# center predictors to reduce collinearity
projectivity_participant = projectivity_participant %>%
mutate(crelativeValence = relativeValence-mean(relativeValence),carousal_mean=arousal_mean-mean(arousal_mean))
m = lmer(veridicality_num ~ carousal_mean * crelativeValence + (1 | participant) + (1 | Word), data = projectivity_participant)
summary(m) # main effects of arousal and relative valence!
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## veridicality_num ~ carousal_mean * crelativeValence + (1 | participant) +
## (1 | Word)
## Data: projectivity_participant
##
## REML criterion at convergence: 6062.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7903 -0.5219 -0.0759 0.5784 3.6057
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 0.05401 0.2324
## participant (Intercept) 0.04782 0.2187
## Residual 0.18567 0.4309
## Number of obs: 4450, groups: Word, 423; participant, 160
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.211080 0.022423 9.413
## carousal_mean 0.054950 0.016052 3.423
## crelativeValence 0.182165 0.023798 7.655
## carousal_mean:crelativeValence 0.008049 0.022795 0.353
##
## Correlation of Fixed Effects:
## (Intr) crsl_m crltvV
## carousal_mn 0.060
## crelatvVlnc 0.046 -0.361
## crsl_mn:crV -0.245 -0.214 -0.202
pdf(file="modelcheck_linear.pdf",height=8,width=9)
check_model(m)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4450 rows containing missing values (geom_text_repel).
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
# create a categorical projectivity variable that treats only "yes" responses as projective, all others as not projective
projectivity_participant$categorical_projectivity = as.factor(ifelse(projectivity_participant$veridicality_num == 1, "projective","non-projective"))
# almost three times as many non-projective compared to projective responses
table(projectivity_participant$categorical_projectivity)
##
## non-projective projective
## 3176 1274
prop.table(table(projectivity_participant$categorical_projectivity))
##
## non-projective projective
## 0.7137079 0.2862921
m = glmer(categorical_projectivity ~ carousal_mean * crelativeValence + (1 | participant) + (1 | Word), data = projectivity_participant, family="binomial")
summary(m) # main effects of arousal and relative valence!
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: categorical_projectivity ~ carousal_mean * crelativeValence +
## (1 | participant) + (1 | Word)
## Data: projectivity_participant
##
## AIC BIC logLik deviance df.resid
## 4045.9 4084.3 -2016.9 4033.9 4444
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1072 -0.4152 -0.2109 0.3425 8.4193
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 2.719 1.649
## participant (Intercept) 1.707 1.307
## Number of obs: 4450, groups: Word, 423; participant, 160
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.68140 0.15224 -11.045 < 2e-16 ***
## carousal_mean 0.49379 0.11782 4.191 2.78e-05 ***
## crelativeValence 1.26659 0.17407 7.276 3.43e-13 ***
## carousal_mean:crelativeValence -0.08131 0.16765 -0.485 0.628
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) crsl_m crltvV
## carousal_mn 0.026
## crelatvVlnc -0.028 -0.305
## crsl_mn:crV -0.238 -0.269 -0.216